m = 100; d = 10; p = 4; eta = 1e-4;
%m = 100; d = 10; p = 10; eta = 1e-8;
%m = 2000; d = 1000; p = 4; eta = 1e-12;
%m = 2000; d = 1000; p = 10; eta = 1e-15;

A = rand(m, d);
x_opt = rand(d, 1);
b = A*x_opt;
iterations = 100;
x_0 = rand(d,1);
x = x_0;

y_newton = [norm(x_0 - x_opt)];

for k = 1:iterations
    x = x - loss_hessian(A, b, x, p)\loss_gradient(A, b, x, p);
    y_newton = [y_newton, norm(x - x_opt)];
end

y_bfgs = [norm(x_0 - x_opt)];
x = x_0;
H = inv(loss_hessian(A, b, x, p));

for k = 1:iterations
    x_new = x - H*loss_gradient(A, b, x, p);
    s = x_new - x;
    y = loss_gradient(A, b, x_new, p) - loss_gradient(A, b, x, p);
    I = eye(d);
    t = 1.0/(s'*y);
    G = t*(H*y)*s';
    K = s*s';
    H = H - G' - G + (t^2*(y'*H*y) + t)*K;
    x = x_new;
    y_bfgs = [y_bfgs, norm(x - x_opt)];
end

y_gd = [norm(x_0 - x_opt)];
x = x_0;

for k = 1:iterations
    x = x - eta*loss_gradient(A, b, x, p);
    y_gd = [y_gd, norm(x - x_opt)];
end

y_gdp = [norm(x_0 - x_opt)];
x = x_0;

for k = 1:iterations
    g = loss_gradient(A, b, x, p);
    eta = loss_function(A, b, x, p)/(norm(g)^(2));
    x = x - eta*g;
    y_gdp = [y_gdp, norm(x - x_opt)];
end

x = 0:iterations;

semilogy(x, y_newton, 'r','LineWidth',5);
hold on
semilogy(x, y_bfgs, 'b','LineWidth',5);
semilogy(x, y_gd, 'g','LineWidth',5);
semilogy(x, y_gdp, 'k','LineWidth',5);
legend({'\textbf{Newton}', '\textbf{BFGS}', '\textbf{GD constant}', '\textbf{GD Polyak}'},'Interpreter','latex','fontsize',30, 'location', 'southwest')
set(gcf,'position',[0,0,600,500])
set(gca,'FontSize', 20, 'fontweight','bold')
xlabel('\textbf{Number of Iteration} ${\boldmath{k}}$','Interpreter','latex','fontsize',40)
ylabel('\textbf{log}$\|${\boldmath${\theta_k - \hat{\theta}}$}$\|$','Interpreter','latex','fontsize',40)
ylim([1e-6 1e2])
grid on
grid minor
hold off